options(scipen = 999) # turn off scientific notation like 1e+06
library(ggplot2)
data("midwest", package = "ggplot2") # load the data
# Explore the data
midwest %>% dim
## [1] 437 28
midwest %>% head
## # A tibble: 6 x 28
## PID county state area poptotal popdensity popwhite popblack
## <int> <chr> <chr> <dbl> <int> <dbl> <int> <int>
## 1 561 ADAMS IL 0.052 66090 1270.9615 63917 1702
## 2 562 ALEXANDER IL 0.014 10626 759.0000 7054 3496
## 3 563 BOND IL 0.022 14991 681.4091 14477 429
## 4 564 BOONE IL 0.017 30806 1812.1176 29344 127
## 5 565 BROWN IL 0.018 5836 324.2222 5264 547
## 6 566 BUREAU IL 0.050 35688 713.7600 35157 50
## # ... with 20 more variables: popamerindian <int>, popasian <int>,
## # popother <int>, percwhite <dbl>, percblack <dbl>, percamerindan <dbl>,
## # percasian <dbl>, percother <dbl>, popadults <int>, perchsd <dbl>,
## # percollege <dbl>, percprof <dbl>, poppovertyknown <int>,
## # percpovertyknown <dbl>, percbelowpoverty <dbl>,
## # percchildbelowpovert <dbl>, percadultpoverty <dbl>,
## # percelderlypoverty <dbl>, inmetro <int>, category <chr>
Initialising a ggplot
ggplot(data = midwest, mapping = aes(x = area, y = poptotal))
Notes:
ggplot(data = midwest, mapping = aes(x = area, y = poptotal)) +
geom_point()
g <- ggplot(data = midwest, mapping = aes(x = area, y = poptotal)) +
geom_point()+
geom_smooth(method = "lm")
plot(g)
This can be done using the xlim() and ylim() .
g <- ggplot(data = midwest, mapping = aes(x = area, y = poptotal)) +
geom_point()+
geom_smooth(method = "lm")
# Delete the points outside the limits
g + xlim(c(0,0.1)) + ylim(c(0,1000000)) # deletes points
## Warning: Removed 5 rows containing non-finite values (stat_smooth).
## Warning: Removed 5 rows containing missing values (geom_point).
# Same thing
g + xlim(0,0.1) + ylim(0,1000000) # deletes points
## Warning: Removed 5 rows containing non-finite values (stat_smooth).
## Warning: Removed 5 rows containing missing values (geom_point).
Change the X and Y axis limits by zooming in to the region of interest without deleting the points. This is done using coord_cartesian().
g <- ggplot(data = midwest, mapping = aes(x = area, y = poptotal)) +
geom_point()+
geom_smooth(method = "lm")
g1 <- g + coord_cartesian(xlim = c(0,0.1), ylim = c(0,1000000)) # zooms in
plot(g1)
Here the line of best fit will not change since all the points were considered for plotting the line of best fit.
Use the labs() function with title, x and y arguments. Also, we can use ggtitle(). xlab(). ylab()
g <- ggplot(data = midwest, mapping = aes(x = area, y = poptotal)) +
geom_point() +
geom_smooth(method = "lm")
g1 <- g + coord_cartesian(xlim = c(0,0.1), ylim = c(0,1000000)) # zooms in
# plot(g1)
# Add title and Labels
g1 + labs(title = "Area Vs Population", subtitle = "From midwest dataset", y = "Population", x = "Area", caption = "Midwest Demographics")
# or
g1 + ggtitle(label = "Area Vs Population", subtitle = "From midwest dataset") + xlab(label = "Area") + ylab(label = "Population")
Here’s the full function call
ggplot(data = midwest, mapping = aes(x = area, y = poptotal)) +
geom_point()+
geom_smooth(method = "lm")+
coord_cartesian(xlim = c(0,0.1), ylim = c(0,1000000))+
labs(title = "Area Vs Population", subtitle = "From midwest dataset", x = "Area", y = "Population", caption = "Midwest Demographics")
# or
ggplot(data = midwest, mapping = aes(x = area, y = poptotal)) +
geom_point()+
geom_smooth(method = "lm")+
coord_cartesian(xlim = c(0,0.1), ylim = c(0,1000000))+
ggtitle(label = "Area Vs Population", subtitle = "From Midwest dataset ")+
xlab(label = "Area")+
ylab(label = "Population")+
labs(caption = "Midwest Demographics")
We can change the aesthetics of a geom layer by modifying the respective geoms.
ggplot(data = midwest, mapping = aes(x = area, y = poptotal))+
geom_point(col = "steelblue", size = 3)+
geom_smooth(method = "lm", col = "firebrick")+
coord_cartesian(xlim = c(0,0.1), ylim = c(0,1000000))+
labs(title = "Area Vs Population", subtitle = "From midwest dataset", y = "Population", x = "Area", caption = "Midwest Demographics")
If we want to color to change based on anotehr column in the source dataset, it must be specified inside the aes() function
gg <- ggplot(data = midwest, mapping = aes(x = area, y = poptotal))+
geom_point(aes(col= state), size = 3)+
geom_smooth(method = "lm", col = "firebrick", size = 2) +
coord_cartesian(xlim = c(0,0.1), ylim = c(0,1000000))+
labs(title="Area Vs Population", subtitle="From midwest dataset", y="Population", x="Area", caption="Midwest Demographics")
plot(gg)
The legend is added automatically. If needed, it can be removed by setting the legend.position to None from within a theme() function.
gg+theme(legend.position = "None") # remove legend
Also, you can change the color pallette entirely
gg+scale_color_brewer(palette = "Set1") # change color pallette
More of such pallettes can be found in RcolorBrewer package
This involves two aspects : breaks and labels
Step1 : Set the breaks
gg <- ggplot(data = midwest, mapping = aes(x = area, y = poptotal))+
geom_point(aes(col= state), size = 3)+
geom_smooth(method = "lm", col = "firebrick", size = 2) +
coord_cartesian(xlim = c(0,0.1), ylim = c(0,1000000))+
labs(title="Area Vs Population", subtitle="From midwest dataset", y="Population", x="Area", caption="Midwest Demographics")
# Change breaks
gg+scale_x_continuous(breaks = seq(0,0.1,0.01))
Step 2 : Change the labels
Change the labels at the axis ticks. labels take a vactor of the same length as breaks
gg <- ggplot(data = midwest, mapping = aes(x = area, y = poptotal)) +
geom_point(aes(col = state), size =3)+
geom_smooth(method = "lm", col = "firebrick", size =2)+
coord_cartesian(xlim = c(0,0.1), ylim = c(0,1000000))+
labs(title="Area Vs Population", subtitle="From midwest dataset", y="Population", x="Area", caption="Midwest Demographics")
# Change breaks + label
gg+scale_x_continuous(breaks = seq(0,0.1,0.01), labels = letters[1:11])
If you need to reverse the scale, use scale_x_reverse().
gg <- ggplot(data = midwest, mapping = aes(x = area, y = poptotal)) +
geom_point(aes(col = state), size =3)+
geom_smooth(method = "lm", col = "firebrick", size =2)+
coord_cartesian(xlim = c(0,0.1), ylim = c(0,1000000))+
labs(title="Area Vs Population", subtitle="From midwest dataset", y="Population", x="Area", caption="Midwest Demographics")
# Reverse X axis scale
gg + scale_x_reverse()
Method 1: Using sprintf()
Method 2 : Using a custom defined function
gg <- ggplot(data = midwest, mapping = aes(x = area, y = poptotal)) +
geom_point(aes(col = state), size =3)+
geom_smooth(method = "lm", col = "firebrick", size =2)+
coord_cartesian(xlim = c(0,0.1), ylim = c(0,1000000))+
labs(title="Area Vs Population", subtitle="From midwest dataset", y="Population", x="Area", caption="Midwest Demographics")
# Change Axis texts
gg + scale_x_continuous(breaks=seq(0, 0.1, 0.01), labels = sprintf("%1.2f%%", seq(0, 0.1, 0.01))) +
scale_y_continuous(breaks=seq(0, 1000000, 200000), labels = function(x){paste0(x/1000, 'K')})
# Base plot
gg <- ggplot(data = midwest, mapping = aes(x = area, y = poptotal)) +
geom_point(aes(col = state), size =3)+
geom_smooth(method = "lm", col = "firebrick", size =2)+
coord_cartesian(xlim = c(0,0.1), ylim = c(0,1000000))+
labs(title="Area Vs Population", subtitle="From midwest dataset", y="Population", x="Area", caption="Midwest Demographics")
gg <- gg + scale_x_continuous(breaks=seq(0, 0.1, 0.01))
# method 1 : Using theme_set()
theme_set(theme_classic()) # not run
plot(gg)
# method 2 : adding theme layer itself
gg+theme_bw() + labs(subtitle = "BW Theme")
gg+theme_classic()+labs(subtitle = "Classic theme")